2015-11-17

創用 CC 授權條款

首先

恭喜Taiwan R User Group 三歲了

據說

今天有殘酷擂台

所以在這特別的日子

當然要講一些 有的沒有的有趣的主題

DQMSL

是什麼?可以吃嗎?

DQMSL = 勇者鬥惡龍怪物仙境

當初看到這個遊戲

身為骨灰級的玩家眼淚都要掉下來了啊

不知荒廢了多少的青春

既然叫做怪物仙境

就是有很多怪物的意思

為了這次的活動

我就稍微爬了一下怪物的資料

Load libraries

library(rvest)
library(stringr)
library(data.table)
library(googleVis)
library(ca)
library(proxy)

Get urls

ranking_pages = read_html("http://dqmsl-search.net/ranking/allsbjstatus?hide=&hides=,star1,star2,star3,star4")
urls_xpath = "/html/body/div[@class='mainh']/div[@class='mainc']/div[@class='ccol']/div[@class='mbox'][2]/div[@class='mboxb']/div/div[@class='innnerHideDiv']/div/div/a"
base_url = "http://dqmsl-search.net"
monster_urls = ranking_pages %>% 
    html_nodes(xpath = urls_xpath) %>%
    html_attr(name = "href") %>%
    unique() %>%
    (function(x) {paste0(base_url, x, sep = "")}) 
monster_urls[1:3]
## [1] "http://dqmsl-search.net/monster/detail?no=501"
## [2] "http://dqmsl-search.net/monster/detail?no=521"
## [3] "http://dqmsl-search.net/monster/detail?no=543"

ETL is omitted.

Preprocessing

monsters = fread("data/monsters.csv")
character_cols = c("id", "name", "rank", "system", "type")
numeric_cols = names(monsters)[!names(monsters) %in% character_cols]
monsters[, c(numeric_cols) := lapply(.SD, as.numeric), .SDcols = numeric_cols]
monsters[, total := hp + mp + str + def + agi + int]
monsters[, icon := sprintf('<img src="img/icon/%s.gif" alt="%s" width="40">', 
                           str_pad(id, width = 6, pad = "0"), id, name)]
monsters[, name := sprintf("<a href='http://dqmsl-search.net/monster/detail?no=%s'>%s</a>",
                           id, name)]
monsters[, `:=`(like_percent = like / view * 100, 
                hate_percent = hate / view * 100)]
monsters = monsters[order(total, decreasing = TRUE),]

Monsters' Data

mtable = gvisTable(monsters, options = list(page = "enable",
                                            pageSize = 5))
print(mtable, "chart")

Plot categorical data with barchart

print_bar = function(dt, colname, height = 400, width = 900) {
    percent = dt[, .(percent = .N), by = colname][
        , percent := percent / sum(percent) * 100]
    print(gvisBarChart(percent, options = list(height = height, width = width)), 
          "chart")
}

Rank distribution

print_bar(monsters, "rank")

System distribution

print_bar(monsters, "system")

Type distribution

print_bar(monsters, "type")

System vs Type

system_type = table(monsters$system, monsters$type)
system_type
##             
##              万能 回復 攻撃 特殊 補助 防御 魔法
##   悪魔系        5    3   32   10   17    3   52
##   物質系        0    4   36    7   19   26   10
##   ドラゴン系    2    2   64    1    4   10    1
##   スライム系    6   11   24   10    5   11    3
##   ???系      5    1   21   12    0    0   14
##   ゾンビ系      3    2   27    1    7    4    6
##   自然系        7    6   27    5   19   20    7
##   転生系        0    0    0   35    0    0    0
##   魔獣系        5    3   70    6   21   10    7

Stacked barchart

system_type_dt = as.data.table(round(prop.table(system_type, margin = 1) * 100, 2))
setnames(system_type_dt, names(system_type_dt), c("system", "type", "count"))
system_type_dt = dcast(system_type_dt, system ~ type, value.var = "count")
yvar = names(system_type_dt)[!names(system_type_dt) %in% "system"]
print(gvisBarChart(system_type_dt, xvar = "system", yvar = yvar, 
                  options = list(isStacked = TRUE, height = 300, width = 900)), "chart")

Correspondence analysis

cafit = ca(system_type)
ca_dt = rbind(data.table(Dim1 = cafit$rowcoord[, 1], 
                         system = cafit$rowcoord[, 2],
                         system.html.tooltip = rownames(system_type), 
                         type = rep(NA, nrow(system_type)),
                         type.html.tooltip = rep(NA, nrow(system_type))),
              data.table(Dim1 = cafit$colcoord[, 1], 
                         system = rep(NA, ncol(system_type)),
                         system.html.tooltip = rep(NA, ncol(system_type)),
                         type = cafit$colcoord[, 2],
                         type.html.tooltip = colnames(system_type)))
tick_str = "{'ticks': [-5, -4, -3, -2, -1, 0, 1, 2] }"
ca_plot = gvisScatterChart(ca_dt, options = list(width = 500, height = 500,
                                                 hAxis = tick_str, vAxis = tick_str))

Correspondence analysis - 2

The total variance of the data matrix is measured by the inertia, which ressembles a chi-square statistic but is calculated on relative observed and expected frequencies.

The cummulative percentage of inertia of two dimension is about 80 %.

cafit
## 
##  Principal inertias (eigenvalues):
##            1        2        3        4        5        6       
## Value      0.405606 0.186753 0.087494 0.044128 0.013726 0.005737
## Percentage 54.56%   25.12%   11.77%   5.94%    1.85%    0.77%   
## 
## 
##  Rows:
##            悪魔系   物質系 ドラゴン系 スライム系  ???系 ゾンビ系
## Mass     0.167353 0.139918   0.115226   0.096022  0.072702 0.068587
## ChiDist  0.870226 0.533945   0.757989   0.669843  0.723305 0.376019
## Inertia  0.126735 0.039890   0.066203   0.043084  0.038036 0.009698
## Dim. 1   0.005581 0.303270   0.624426  -0.109549 -0.626504 0.492531
## Dim. 2  -2.005154 0.456370   0.916871   0.717588 -0.951892 0.032618
##           自然系    転生系   魔獣系
## Mass    0.124829  0.048011 0.167353
## ChiDist 0.525387  2.716489 0.430770
## Inertia 0.034457  0.354288 0.031054
## Dim. 1  0.338404 -4.218845 0.402011
## Dim. 2  0.425708  0.824756 0.426592
## 
## 
##  Columns:
##              万能     回復     攻撃      特殊      補助     防御      魔法
## Mass     0.045267 0.043896 0.412894  0.119342  0.126200 0.115226  0.137174
## ChiDist  0.668747 0.912812 0.447466  1.718554  0.541708 0.752207  1.043109
## Inertia  0.020245 0.036575 0.082672  0.352467  0.037033 0.065197  0.149256
## Dim. 1   0.159083 0.238895 0.447837 -2.686864  0.445905 0.480385  0.046881
## Dim. 2  -0.240881 0.613545 0.387561  0.356418 -0.022284 0.986840 -2.401930

Correspondence analysis - 3

print(ca_plot, "chart")

Weight

print(ftable(rank ~ weight, data = monsters))
##        rank   A   B   C   D   E   F   S  SS
## weight                                     
## 2             0   0   0  71  38  15   0   0
## 3             0   0  80   0   0   0   0   0
## 6             0 112   0   0   0   0   0   0
## 9           171   0   0   0   0   0   0   0
## 14            0   0   0   0   0   0  61   0
## 18            0   0   0   0   0   0 110   0
## 23            0   0   0   0   0   0   9   0
## 27            0   0   0   0   0   0   0  53
## 32            0   0   0   0   0   0   0   9

Classical Multidimensional scaling

Here we choose SS monsters to find distance of monsters via hp, mp, str, def, agi and int.

body_cols = c("hp", "mp", "str", "def", "agi", "int")
scaled_body_cols = paste0("scaled_", body_cols)
ss = copy(monsters[rank %in% c("SS"), ])
ss[, icon := str_replace(icon, 'width=\"40\"', 'width=\"120\"')]
ss[, c(scaled_body_cols) := lapply(.SD, scale), .SDcols = body_cols]
ss_dist = dist(ss[, scaled_body_cols, with = FALSE])

fit = cmdscale(ss_dist, eig = TRUE, k=2)
plot_dt = data.table(Dim1 = fit$points[,1],
                     Dim2 = fit$points[,2],
                     Dim2.html.tooltip = ss$icon)

plot_out = gvisScatterChart(plot_dt, options=list(tooltip="{isHtml:'true'}",
                                                  width = 500, height = 500, 
                                                  legend = '{"position": "none"}'))

Classical Multidimensional scaling - 2

GOF measures the goodness of fit of Multidimensional scaling

g.i = (sum{j=1..k} λ[j]) / (sum{j=1..n} T.i(λ[j])), where:

  • λ[j] are the eigenvalues (sorted in decreasing order)
  • T.1(v) = abs(v)
  • T.2(v) = max(v, 0)
fit$GOF
## [1] 0.667616 0.667616

Classical Multidimensional scaling - 3

print(plot_out, "chart")

Compute mean for each type

ss_type_mean = ss[, lapply(.SD, mean), 
                  by = c("type"), 
                  .SDcols = body_cols]
ss_type_mean = dcast(melt(ss_type_mean, id.vars = "type"), variable ~ type)
plot_out = gvisLineChart(ss_type_mean, xvar = "variable", 
                         options = list(height = 300, width = 800))
print(plot_out, "chart")

Skills of monsters

Skills of monsters can be found in this page.

skills = fread("data/skills.csv")
skills[, skill_mp := str_replace_all(skill, "^.*\\(消費MP:|\\)$", "")]
skills = suppressWarnings(merge(skills, 
               monsters[, .(monster = gsub("<a href=\\'.*\\'>|</a>", "", 
                                           monsters$name), rank, name, system, icon)],
               by = "monster", all.x = TRUE, all.y = FALSE))
stable = gvisTable(skills, options = list(page = "enable",
                                          pageSize = 5))

Skills Table

print(stable, "chart")

Skills type

print_bar(skills, "skill_type")

Most common skills

skill_count = skills[, .(count = .N), by = "skill"][order(-count)]
sctable = gvisTable(skill_count, options = list(page = "enable",
                                               pageSize = 10))
print(sctable, "chart")

Classical Multidimensional scaling via skill

ss_skills = copy(skills[rank %in% c("SS"),])
monster_skill = table(ss_skills$monster, ss_skills$skill_type)
skills_dist = dist(monster_skill, method = "jaccard")
ss_skills_dist = dist(monster_skill, method = "jaccard")

fit = cmdscale(ss_skills_dist, eig = TRUE, k=2)
plot_dt = data.table(Dim1 = fit$points[,1],
                     Dim2 = fit$points[,2],
                     Dim2.html.tooltip = ss$icon)
plot_out = gvisScatterChart(plot_dt, options=list(tooltip="{isHtml:'true'}",
                                                  width = 500, height = 500, 
                                                  legend = '{"position": "none"}'))

Classical Multidimensional scaling via skill - 2

The GOF is around 0.5.

fit$GOF
## [1] 0.4727897 0.4954645

Classical Multidimensional scaling via skill - 3

print(plot_out, "chart")

Correspondence analysis via skill

ss_skills = copy(skills[rank %in% c("SS"),])
monster_skill = table(ss_skills$system, ss_skills$skill_type)
cafit = ca(monster_skill)
ca_dt = rbind(data.table(Dim1 = cafit$rowcoord[, 1], 
                         system = cafit$rowcoord[, 2],
                         system.html.tooltip = rownames(monster_skill), 
                         skilltype = rep(NA, nrow(monster_skill)),
                         skilltype.html.tooltip = rep(NA, nrow(monster_skill))),
              data.table(Dim1 = cafit$colcoord[, 1], 
                         system = rep(NA, ncol(monster_skill)),
                         system.html.tooltip = rep(NA, ncol(monster_skill)),
                         skilltype = cafit$colcoord[, 2],
                         skilltype.html.tooltip = colnames(monster_skill)))
tick_str = "{'ticks': [-5, -4, -3, -2, -1, 0, 1, 2] }"
ca_plot = gvisScatterChart(ca_dt, options = list(width = 500, height = 500,
                                                 hAxis = tick_str, vAxis = tick_str))

Correspondence analysis via skill - 2

The cummulative percentage of inertia of two dimension is about 56 %.

cafit
## 
##  Principal inertias (eigenvalues):
##            1        2        3        4       5        6        7       
## Value      0.289277 0.219797 0.156037 0.10297 0.075914 0.044696 0.024312
## Percentage 31.68%   24.07%   17.09%   11.28%  8.31%    4.9%     2.66%   
## 
## 
##  Rows:
##            悪魔系   物質系 ドラゴン系 スライム系  ???系 ゾンビ系
## Mass     0.131148 0.114754   0.098361   0.073770  0.278689 0.081967
## ChiDist  0.973843 0.947616   1.372804   1.100903  0.552119 0.846174
## Inertia  0.124376 0.103046   0.185370   0.089409  0.084954 0.058689
## Dim. 1  -1.095803 0.797867   1.511370  -1.207212 -0.680161 0.110800
## Dim. 2  -0.851758 0.766243   1.809836   0.355047  0.304241 0.043428
##            自然系    魔獣系
## Mass     0.073770  0.147541
## ChiDist  1.624782  0.700549
## Inertia  0.194748  0.072409
## Dim. 1   2.028826  0.158290
## Dim. 2  -2.392724 -0.625370
## 
## 
##  Columns:
##          体技回復  体技攻撃  体技特殊 体技状態異常 体技補助アップ
## Mass     0.008197  0.040984  0.024590     0.024590       0.057377
## ChiDist  1.608799  1.003560  1.160973     1.200830       1.053975
## Inertia  0.021215  0.041276  0.033144     0.035459       0.063738
## Dim. 1  -1.264605 -0.091430 -0.744969    -1.522203       0.544751
## Dim. 2   0.648943  0.052891 -0.012008    -0.172969       1.863531
##          回復呪文 ブレス攻撃  攻撃呪文 斬撃攻撃 斬撃状態異常  特殊呪文
## Mass     0.049180   0.098361  0.180328 0.147541     0.295082  0.016393
## ChiDist  0.922279   0.961557  0.806062 0.551263     0.496474  1.812718
## Inertia  0.041833   0.090944  0.117165 0.044836     0.072734  0.053868
## Dim. 1  -1.180605   0.838671 -1.364697 0.009026     0.622830 -1.754572
## Dim. 2  -0.578102   0.538842 -0.416508 0.371013     0.017035  0.703128
##         ブレス状態異常 ブレス補助 補助アップ呪文 踊り状態異常
## Mass          0.008197   0.008197       0.024590     0.016393
## ChiDist       3.543382   3.027650       1.451372     2.020726
## Inertia       0.102914   0.075137       0.051799     0.066940
## Dim. 1        3.772144   2.810051       0.676350     2.033224
## Dim. 2       -5.103665   3.860368      -2.751457    -3.218788

Correspondence analysis via skill - 3

print(ca_plot, "chart")

最後

在勇者鬥惡龍的世界,與怪物們一起冒險吧!輸入邀請碼「BLNEeQbw」即可獲得豪華獎勵!


















謝謝大家(有機會到這一頁嗎XD)